/*==============================================================================
FILE NAME: Air_panel_with_referred.do
INPUT: incidents.dta, Air_Panel.dta
OUTPUT: Air_panel_with_referred.dta
CREATED: 2 November 2022
UPDATED: 12 September 2024
==============================================================================*/

/* Set directory if working independently through code
if c(username)=="" { //insert username
	global rootdir "" // insert root path
	global processed_data "$rootdir/processed_data"  // Define global paths for replication package
} 
*/

// Begin construction of RN-by-month panel
// Load incident data and keep only referred complaints
use "$processed_data/incidents.dta", clear
keep if IncidentStatus=="REFERRED"

// Extract RN_id from RegulatedEntity and clean up
gen RN_id=substr(RegulatedEntity,3,.)
label var RN_id "same as RN without 'RN'"
destring RN_id, replace

// Rename complaint incident variable for clarity
rename ComplaintIncident CIN

// Convert incident date to Stata date format and clean
gen temp = date(IncidentRecDate,"MDY")
drop IncidentRecDate 
rename temp IncidentRecDate
format IncidentRecDate %td
replace IncidentRecDate = . if IncidentRecDate < 0

// Generate year, month, and monthly date variables
gen year=year(IncidentRecDate)
gen month=month(IncidentRecDate)
gen mdate=ym(year,month)
format mdate %tm

// Restrict data to 2003–2019
//restrict to 2003 to 2019
keep if mdate>=tm(2003m1) & mdate<=tm(2019m12)

// Mark referred complaints
gen referred=1

// Keep only relevant columns
keep CIN RN_id mdate referred

// Remove duplicate complaint incidents
duplicates drop
isid CIN

// Sort by RN and month
//at this point CIN uniquely identifies observations
sort RN_id mdate

// Collapse to get number of referred complaints per RN per month
//this will give us # of complaints in the month
collapse (sum) referred, by(RN_id mdate)
label var referred "# of referred complaint incidents against RN in month"
sort RN_id mdate

// Merge with main Air_Panel data
merge 1:1 RN_id mdate using "$processed_data/Air_Panel.dta"
// Identify unique RN_id in merged data
unique RN_id if _merge==3
// Remove observations from Air_Panel that did not merge with referred complaints
drop if _merge==1
drop _merge
// Set referred to 0 for missing values, then create a binary indicator for referred complaints
replace referred=0 if referred==.
gen p_referred=(referred>0)
label var p_referred "=1 if RN had at least one referred complaint incident in month"
sort RN_id mdate

// Prepare panel for local projection (LP) models by removing entities never investigated for air
drop if never_air_inv==1
// Create a time variable for panel structure
egen t = group(year month)
xtset RN_id t
// For each air investigation variable, generate forward and backward difference variables for LP models
foreach y in p_air_complaint_inv p_air_nocomplaint_inv p_air_inv p_air_nov p_air_noe {
forv h = 0/12 {
gen `y'_`h' = f`h'.`y' - l1.`y'
}

forv h = 2/12 {
gen `y'_neg`h' = l`h'.`y' - l1.`y'
}
}

// Create a group variable for RN and year
egen RN_year = group(RN year)

// Drop observations with missing values in generated LP variables
foreach h in 0 1 2 3 4 5 6 7 8 9 10 11 12 {
drop if p_air_inv_`h' == .
}

// Drop observations with missing values in generated LP variables (negative lags)
foreach h in 2 3 4 5 6 7 8 9 10 11 12 {
drop if p_air_inv_neg`h' == .
}

// Save the final panel dataset
save "$processed_data/Air_panel_with_referred.dta", replace